---
title: "Cleaning Script for Assessing Public Value Failure in Government Adoption of AI"
author: "Daniel Schiff, Kaylyn Jackson Schiff, and Patrick Pierson"
date: "2021"
output: pdf_document
editor_options: 
  chunk_output_type: console
---

#####Setup Chunk#####
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
require(rmarkdown)
require(tidyverse)
require(haven)
require(skimr)
```

#####Import Data#####
```{r import data}
ads_raw <- read_spss("Data/ads_raw.sav")
ads <- ads_raw %>% filter(Finished==1) %>% as_tibble
```

#####Check Quality of Data#####
```{r quality check}
#Reproduce Figure SI8 
min(ads$Duration__in_seconds_)
ads$duration_min <- ads$Duration__in_seconds_/60
hist <- ggplot(ads, aes(x = duration_min)) + 
  geom_histogram(binwidth=1, color="black", fill="gray") +
  geom_vline(aes(xintercept=mean(duration_min)), linetype = "dashed") +
  xlab("Duration in Minutes") +
  ylab("Number of Respondents") +
  ggtitle("Time Taken on Survey as Measure of Quality")
  ggsave("Figures/fig_SI8.png")
hist
mean(ads$duration_min) #average time taken was almost 5 minutes
median(ads$duration_min) # 4 min median time
quantile(ads$duration_min, 0.25) # 3 minutes is 1st quartile

#For quality check, filter out people who took less than 3 minutes on the survey and export separate clean file
attentive <- F #switch to T in order to output attentive sample instead
if (attentive == T) {
  ads <- filter(ads, duration_min>=3)
}
```

#####Subset Data#####
```{r subset data}
ads <- ads %>% select(ResponseId, Block_ID, Wing_Order, Vignette,                                             #respondent data
                      Feeling = W2_Feeling_1, Trust = W2_Trust_1, Quality = W2_Quality_1, Impact = W2_Impact, #outcomes
                      Petition = W2_Petition, Meeting = W2_Meeting,                                           #experimental outcomes
                      Pol_7, Knowledge = W2_Knowledge, Gender, Birthyear, Race, Educ, Marital_Status,         #demographics
                      Income, Employment_Status, Industry)                                                    #demographics (cont.)


var_names <- colnames(ads)                                  #store variable names
var_labels <- lapply(ads, function(x) attributes(x)$label)  #store survey questions
ads[,-c(1:4)] <- lapply(ads[,-c(1:4)], as.numeric)          #recode additional columns as numeric

sapply(ads, class)                                          #check class
skim(ads)                                                   #summary statistics
```

#####Recode Variables#####
```{r recode variables}
#First, create new variable, "Employment", combining the information from Employment_Status and Industry 
#to avoid collinearity problems
ads <- mutate(ads, Employment = case_when(Industry!="Unemployed" ~ ads$Industry,
                            is.na(Industry)==T & Employment_Status==4 ~ 19,
                            is.na(Industry)==T & Employment_Status==5 ~ 20,
                            is.na(Industry)==T & Employment_Status==6 ~ 21,
                            is.na(Industry)==T & Employment_Status==7 ~ 22,
                            is.na(Industry)==T & Employment_Status==8 ~ 23,
                            is.na(Industry)==T & Employment_Status==9 ~ 24,
                            is.na(Industry)==T & Employment_Status==10 ~ 25))

colnames(ads)[1:4]   #meta-data

ads$Block_ID <- factor(ads$Block_ID,ordered = FALSE)
ads$Block_ID <- relevel(ads$Block_ID, ref="W Ind Male")

ads$Vignette <- factor(ads$Vignette,levels = c("W2_Health_Control","W2_Health_Bias","W2_Health_Transparency","W2_Health_Agency",
                               "W2_Courts_Control","W2_Courts_Bias","W2_Courts_Transparency","W2_Courts_Agency"),
                       labels = c("Health Control","Health Bias","Health Transparency","Health Responsiveness",
                               "Courts Control","Courts Bias","Courts Transparency","Courts Responsiveness"),
                    ordered = FALSE) #Health Control as reference category


colnames(ads)[11:20] #demographics

ads <- ads %>% mutate(Pol = case_when(
  Pol_7 == 1 | Pol_7 == 2 | Pol_7 == 3  ~ 1,     # "Liberal",
  Pol_7 == 4 ~ 2,                                # "Independent",
  Pol_7 == 5 | Pol_7 == 6 | Pol_7 == 7  ~ 3      # "Conservative"
)) %>% select(-Pol_7)

ads$Pol <- factor(ads$Pol, levels = c(2,1,3),    # Independent as reference category
                    labels = c("Independent","Liberal","Conservative"),
                    ordered = FALSE)

ads$Knowledge <- factor(ads$Knowledge, levels = c(1,2,3,4),    
                    labels = c("Not at all","A little","A moderate amount","A lot"),
                    ordered = FALSE)

ads$Gender <- factor(ads$Gender, levels = c(1,2,3), 
                     labels = c("Male","Female","Other"), ordered = FALSE)

ads <- ads %>% mutate(Age = case_when(
  Birthyear >= 1900 & Birthyear <= 1945 ~ 5,
  Birthyear >= 1946 & Birthyear <= 1964 ~ 4,
  Birthyear >= 1965 & Birthyear <= 1980 ~ 3,
  Birthyear >= 1981 & Birthyear <= 1996 ~ 2,
  Birthyear >= 1997 ~ 1)) %>% select(-Birthyear)

ads$Age <- factor(ads$Age, levels = c(1,2,3,4,5), 
                  labels = c("Gen Z","Millenial","Gen X","Boomers","Silent"), 
                  ordered = FALSE)

ads <- ads %>% mutate(Race = case_when(
  Race == 1 ~ 1,        #White
  Race == 2 ~ 2,        #Black
  Race >= 3 ~ 3        #"Hispanic or Latino/a","Asian","American Indian or Alaska Native","Native Hawaiian or Other Pacific Islander"
  ))

ads$Race <- factor(ads$Race, levels = c(1,2,3), 
                  labels = c("White","Black or African American","Other Race"), 
                  ordered = FALSE)

ads <- ads %>% mutate(Educ = case_when(
  Educ <= 2 ~ 1,                 #High school graduate or less
  Educ == 3 | Educ == 4 ~ 2,     #Some college or Associate degree
  Educ >= 5 ~ 3                 #Bachelor's or higher
  ))

ads$Educ <- factor(ads$Educ, levels = c(1,2,3), 
                  labels = c("High school graduate or less","Some college","Bachelor's or higher"), 
                  ordered = FALSE)

ads$Marital_Status <- factor(ads$Marital_Status, levels = c(1,2,3,4,5),
                  labels = c("Married","Widowed","Divorced","Separated","Never Married"),
                  ordered = FALSE)

ads <- ads %>% mutate(Income = case_when(
  Income <= 3 ~ 1,                   #Low income (<$30,000)
  Income >= 4 & Income <= 7 ~ 2,     #Middle income ($30,000 to $69,999)
  Income >= 8 ~ 3                   #High income (>$70,000)
  ))

ads$Income <- factor(ads$Income, levels = c(1,2,3),
                     labels = c("Low income","Middle income","High income"),
                     ordered = FALSE)

ads$Employment_Status <- factor(ads$Employment_Status, levels = c(1:10),
                  labels = c("Employed for wages","Employed for salary","Self-employed",
                             "Out of work and looking for work","Out of work but not currently looking for work",
                             "A homemaker","A student","Military","Retired","Unable to work"),
                  ordered = FALSE)

ads$Industry <- if_else(is.na(ads$Industry), 19, ads$Industry)
ads$Industry <- factor(ads$Industry, levels = c(1:19),
                  labels = c("Agriculture, Forestry, Fishing and Hunting",
                             "Mining, Quarrying, and Oil and Gas Extraction","Utilities","Construction",
                             "Wholesale Trade","Information","Finance and Insurance",
                             "Real Estate and Rental and Leasing",
                             "Professional, Scientific, and Technical Services",
                             "Management of Companies and Enterprises",
                             "Administrative and Support and Waste Management and Remediation Services",
                             "Educational Services","Health Care and Social Assistance",
                             "Arts, Entertainment, and Recreation","Accommodation and Food Services",
                             "Other Services (except Public Administration)","Public Administration",
                             "Other Industry", "Unemployed"),
                  ordered = FALSE)

ads$Employment <- factor(ads$Employment, levels = c(1:25),
                         labels = c("Agriculture, Forestry, Fishing and Hunting",
                             "Mining, Quarrying, and Oil and Gas Extraction","Utilities","Construction",
                             "Wholesale Trade","Information","Finance and Insurance",
                             "Real Estate and Rental and Leasing",
                             "Professional, Scientific, and Technical Services",
                             "Management of Companies and Enterprises",
                             "Administrative and Support and Waste Management and Remediation Services",
                             "Educational Services","Health Care and Social Assistance",
                             "Arts, Entertainment, and Recreation","Accommodation and Food Services",
                             "Other Services (except Public Administration)","Public Administration",
                             "Other Industry", "Out of work and looking for work",
                             "Out of work but not currently looking for work",
                             "A homemaker","A student","Military","Retired","Unable to work"),
                         ordered = FALSE)

colnames(ads)[5:10]  #outcomes

#Reverse order of Likert scale for experimental outcomes
ads <- ads %>% mutate(Petition = case_when(
  Petition == 1 ~ 7,                
  Petition == 2 ~ 6,     
  Petition == 3 ~ 5, 
  Petition == 4 ~ 4,
  Petition == 5 ~ 3,
  Petition == 6 ~ 2,
  Petition == 7 ~ 1
  ))

ads <- ads %>% mutate(Meeting = case_when(
  Meeting == 1 ~ 7,                
  Meeting == 2 ~ 6,     
  Meeting == 3 ~ 5, 
  Meeting == 4 ~ 4,
  Meeting == 5 ~ 3,
  Meeting == 6 ~ 2,
  Meeting == 7 ~ 1
  ))

colnames(ads)
ads <- ads %>% select(ResponseId,Block_ID,Wing_Order,Vignette,
                      Feeling:Impact,
                      Petition:Meeting,
                      Pol,Knowledge,Gender,Age,Race,Educ,
                      Marital_Status,Income,Employment_Status,Industry,
                      Employment)
sapply(ads,class)
skim(ads)
```

#####Create Mechanism Variable#####
```{r combining treatments across policy sectors}
ads <- mutate(ads, Vignette = case_when(Vignette=="Health Control" ~ "Child Welfare Control",
                                        Vignette=="Health Bias" ~ "Child Welfare Bias",
                                        Vignette=="Health Transparency" ~ "Child Welfare Transparency",
                                        Vignette=="Health Responsiveness" ~ "Child Welfare Responsiveness",
                                        Vignette=="Courts Control" ~ "Courts Control",
                                        Vignette=="Courts Bias" ~ "Courts Bias",
                                        Vignette=="Courts Transparency" ~ "Courts Transparency",
                                        Vignette=="Courts Responsiveness" ~ "Courts Responsiveness"))
ads$Vignette <- factor(ads$Vignette, levels=c("Child Welfare Control", "Child Welfare Bias", "Child Welfare Transparency", "Child Welfare Responsiveness", "Courts Control", "Courts Bias", "Courts Transparency", "Courts Responsiveness"), labels=c("Child Welfare Control", "Child Welfare Bias", "Child Welfare Transparency", "Child Welfare Responsiveness", "Courts Control", "Courts Bias", "Courts Transparency", "Courts Responsiveness"))
ads <- mutate(ads, Mechanism = case_when(Vignette=="Child Welfare Control" | Vignette=="Courts Control" ~ "Control",
                                       Vignette=="Child Welfare Bias" | Vignette=="Courts Bias" ~ "Bias",
                                       Vignette=="Child Welfare Transparency" | Vignette=="Courts Transparency" ~ "Transparency",
                                       Vignette=="Child Welfare Responsiveness" | Vignette=="Courts Responsiveness" ~ "Responsiveness"))
ads$Mechanism <- factor(ads$Mechanism, levels=c("Control", "Bias", "Transparency", "Responsiveness"), labels=c("Control", "Bias", "Transparency", "Responsiveness"))
```

#####Export File#####
```{r export file}
if (attentive == F) {
  save(ads, file = "Data/ads_clean.RData")
}

if (attentive == T) {
    save(ads, file = "Data/ads_clean_attentive.RData")
}
```



